
library("rstudioapi")     
##Set working directory
setwd(dirname(getActiveDocumentContext()$path))
#setwd("../")
getwd()
library("readxl") 
require(miceadds)
library(data.table)
library(tidyverse)
library(DescTools)
library(htmlTable)
library(stargazer)
library(xlsx)
library(estimatr)
library(tidyverse)
library(geosphere)
library(DescTools)
require(dplyr)

load("FinalFinal20062023Full1-6.RData")
load("FinalFinalSP20062023Full1-6.RData")

new_data = final_finalV2[which(final_finalV2$p_i == 1),]

combo = new_data

# total number of  Villages
total_num_villages <- combo %>%
  group_by(Q123) %>%
  summarise(n = n()) %>%
  summarise(n = n())

total_num_villages <- total_num_villages[[1]]

# total number of Villages per treatment
num_villages <- combo %>%
  group_by(VillageTreatment, Q123) %>%
  summarise(n = n()) %>%
  summarise(n = n())

villages_cdc <- num_villages$n[num_villages$VillageTreatment == "CDC health"][1]
villages_hc <- num_villages$n[num_villages$VillageTreatment == "High Cash"][1]
villages_lc <- num_villages$n[num_villages$VillageTreatment == "Low Cash"][1]
villages_pla <- num_villages$n[num_villages$VillageTreatment == "Placebo"][1]

#Total number of subjects
subj_total <- nrow(combo)

#Total number of subjects per treatment
subj_treatment <- combo %>%
  group_by(individual_treatment) %>%
  summarise(n = n())

subj_t1 <- subj_treatment$n[subj_treatment$individual_treatment == "Placebo"][1]
subj_t2 <- subj_treatment$n[subj_treatment$individual_treatment == "CDC Health"][1]
subj_t3 <- subj_treatment$n[subj_treatment$individual_treatment == "Low Cash"][1]
subj_t4 <- subj_treatment$n[subj_treatment$individual_treatment == "High Cash"][1]

#Vaccine initial intentions total
v_init_int_total <- combo %>%
  group_by(vaccine_intention) %>%
  summarise(n = n())

vii_total_0 <- v_init_int_total$n[v_init_int_total$vaccine_intention == 0][1]
vii_total_1 <- v_init_int_total$n[v_init_int_total$vaccine_intention == 1][1]/sum(v_init_int_total[which(v_init_int_total$vaccine_intention %in% c(0,1)),]$n)*100

#Vaccine initial intentions per treatment
v_init_int_treatment <- combo %>%
  group_by(individual_treatment, vaccine_intention) %>%
  summarise(n = n())

vii_pla_0 <- v_init_int_treatment$n[v_init_int_treatment$vaccine_intention == 0 & v_init_int_treatment$individual_treatment == "Placebo"][1]
vii_pla_1 <- v_init_int_treatment$n[v_init_int_treatment$vaccine_intention == 1 & v_init_int_treatment$individual_treatment == "Placebo"][1]/sum(v_init_int_treatment[which(v_init_int_treatment$individual_treatment=="Placebo"&v_init_int_treatment$vaccine_intention %in% c(0,1)),]$n)*100

vii_cdc_0 <- v_init_int_treatment$n[v_init_int_treatment$vaccine_intention == 0 & v_init_int_treatment$individual_treatment == "CDC Health"][1]
vii_cdc_1 <- v_init_int_treatment$n[v_init_int_treatment$vaccine_intention == 1 & v_init_int_treatment$individual_treatment == "CDC Health"][1]/sum(v_init_int_treatment[which(v_init_int_treatment$individual_treatment=="CDC Health"&v_init_int_treatment$vaccine_intention %in% c(0,1)),]$n)*100

vii_lc_0 <- v_init_int_treatment$n[v_init_int_treatment$vaccine_intention == 0 & v_init_int_treatment$individual_treatment == "Low Cash"][1]
vii_lc_1 <- v_init_int_treatment$n[v_init_int_treatment$vaccine_intention == 1 & v_init_int_treatment$individual_treatment == "Low Cash"][1]/sum(v_init_int_treatment[which(v_init_int_treatment$individual_treatment=="Low Cash"&v_init_int_treatment$vaccine_intention %in% c(0,1)),]$n)*100

vii_hc_0 <- v_init_int_treatment$n[v_init_int_treatment$vaccine_intention == 0 & v_init_int_treatment$individual_treatment == "High Cash"][1]
vii_hc_1 <- v_init_int_treatment$n[v_init_int_treatment$vaccine_intention == 1 & v_init_int_treatment$individual_treatment == "High Cash"][1]/sum(v_init_int_treatment[which(v_init_int_treatment$individual_treatment=="High Cash"&v_init_int_treatment$vaccine_intention %in% c(0,1)),]$n)*100

### SD: 
vii_total_sd <- sd(combo$vaccine_intention, na.rm=TRUE)*100

vii_treatment_sd <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = sd(vaccine_intention,  na.rm = TRUE))

vii_pla_sd <- vii_treatment_sd$mean[vii_treatment_sd$individual_treatment == "Placebo"][1]*100
vii_cdc_sd <- vii_treatment_sd$mean[vii_treatment_sd$individual_treatment == "CDC Health"][1]*100
vii_lc_sd <- vii_treatment_sd$mean[vii_treatment_sd$individual_treatment == "Low Cash"][1]*100
vii_hc_sd <- vii_treatment_sd$mean[vii_treatment_sd$individual_treatment == "High Cash"][1]*100

#Vaccine certainty intention total
v_cert_total <- mean(combo$chance_vaccine, na.rm=TRUE)

#Vaccine certainty intention per treatment
v_cert_treatment <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = mean(chance_vaccine,  na.rm = TRUE))

v_cert_pla <- v_cert_treatment$mean[v_cert_treatment$individual_treatment == "Placebo"][1]
v_cert_cdc <- v_cert_treatment$mean[v_cert_treatment$individual_treatment == "CDC Health"][1]
v_cert_lc <- v_cert_treatment$mean[v_cert_treatment$individual_treatment == "Low Cash"][1]
v_cert_hc <- v_cert_treatment$mean[v_cert_treatment$individual_treatment == "High Cash"][1]

### SD: 
v_cert_total_sd <- sd(combo$chance_vaccine, na.rm=TRUE)

#Vaccine certainty intention per treatment
v_cert_treatment_sd <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = sd(chance_vaccine,  na.rm = TRUE))

v_cert_pla_sd <- v_cert_treatment_sd$mean[v_cert_treatment_sd$individual_treatment == "Placebo"][1]
v_cert_cdc_sd <- v_cert_treatment_sd$mean[v_cert_treatment_sd$individual_treatment == "CDC Health"][1]
v_cert_lc_sd <- v_cert_treatment_sd$mean[v_cert_treatment_sd$individual_treatment == "Low Cash"][1]
v_cert_hc_sd <- v_cert_treatment_sd$mean[v_cert_treatment_sd$individual_treatment == "High Cash"][1]

# code taken from Matze No. 2: 
combo$solar_usage <- as.factor(combo$incentives_4)
combo$solar_usage <- as.numeric(combo$solar_usage)

combo$solar_usage[combo$solar_usage=="1"] <- "0"
combo$solar_usage[combo$solar_usage=="2"] <- "1"
combo$solar_usage <- as.numeric(combo$solar_usage)

combo$chance_solar <- as.numeric(combo$incentives_5)

#Use of solar total
solar_usage_total <- combo %>%
  group_by(solar_usage) %>%
  summarise(n = n())

su_total_0 <- solar_usage_total$n[solar_usage_total$solar_usage == 0][1]
su_total_1 <- solar_usage_total$n[solar_usage_total$solar_usage == 1][1]/sum(solar_usage_total[which(na.omit(solar_usage_total$solar_usage)%in%c(0,1)),]$n)*100

#Use of solar per treatment
solar_usage_treatment <- combo %>%
  group_by(individual_treatment, solar_usage) %>%
  summarise(n = n())

su_pla_0 <- solar_usage_treatment$n[solar_usage_treatment$solar_usage == 0 & solar_usage_treatment$individual_treatment == "Placebo"][1]
su_pla_1 <- solar_usage_treatment$n[solar_usage_treatment$solar_usage == 1 & solar_usage_treatment$individual_treatment == "Placebo"][1]/sum(solar_usage_treatment[which(solar_usage_treatment$solar_usage%in%c(0,1) & solar_usage_treatment$individual_treatment=="Placebo"),]$n)*100

su_cdc_0 <- solar_usage_treatment$n[solar_usage_treatment$solar_usage == 0 & solar_usage_treatment$individual_treatment == "CDC Health"][1]
su_cdc_1 <- solar_usage_treatment$n[solar_usage_treatment$solar_usage == 1 & solar_usage_treatment$individual_treatment == "CDC Health"][1]/sum(solar_usage_treatment[which(solar_usage_treatment$solar_usage%in%c(0,1) & solar_usage_treatment$individual_treatment=="CDC Health"),]$n)*100


su_lc_0 <- solar_usage_treatment$n[solar_usage_treatment$solar_usage == 0 & solar_usage_treatment$individual_treatment == "Low Cash"][1]
su_lc_1 <- solar_usage_treatment$n[solar_usage_treatment$solar_usage == 1 & solar_usage_treatment$individual_treatment == "Low Cash"][1]/sum(solar_usage_treatment[which(solar_usage_treatment$solar_usage%in%c(0,1) & solar_usage_treatment$individual_treatment=="Low Cash"),]$n)*100


su_hc_0 <- solar_usage_treatment$n[solar_usage_treatment$solar_usage == 0 & solar_usage_treatment$individual_treatment == "High Cash"][1]
su_hc_1 <- solar_usage_treatment$n[solar_usage_treatment$solar_usage == 1 & solar_usage_treatment$individual_treatment == "High Cash"][1]/sum(solar_usage_treatment[which(solar_usage_treatment$solar_usage%in%c(0,1) & solar_usage_treatment$individual_treatment=="High Cash"),]$n)*100


# Standard deviations: 
su_total_sd <- sd(combo$solar_usage, na.rm=TRUE)*100

su_treatment_sd <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = sd(solar_usage,  na.rm = TRUE))

su_pla_sd <- su_treatment_sd$mean[su_treatment_sd$individual_treatment == "Placebo"][1]*100
su_cdc_sd <- su_treatment_sd$mean[su_treatment_sd$individual_treatment == "CDC Health"][1]*100
su_lc_sd <- su_treatment_sd$mean[su_treatment_sd$individual_treatment == "Low Cash"][1]*100
su_hc_sd <- su_treatment_sd$mean[su_treatment_sd$individual_treatment == "High Cash"][1]*100

#Chance to buy solar total
buy_solar_total <- mean(combo$chance_solar, na.rm=TRUE)

#Chance to buy solar per treatment
buy_solar_treatment <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = mean(chance_solar,  na.rm = TRUE)) 

buy_solar_pla <- buy_solar_treatment$mean[buy_solar_treatment$individual_treatment == "Placebo"][1]
buy_solar_cdc <- buy_solar_treatment$mean[buy_solar_treatment$individual_treatment == "CDC Health"][1]
buy_solar_lc <- buy_solar_treatment$mean[buy_solar_treatment$individual_treatment == "Low Cash"][1]
buy_solar_hc <- buy_solar_treatment$mean[buy_solar_treatment$individual_treatment == "High Cash"][1]

##### SD for buy solar total: 
buy_solar_total_sd <- sd(combo$chance_solar, na.rm=TRUE)

#Chance to buy solar per treatment
buy_solar_treatment_sd <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = sd(chance_solar, na.rm = TRUE)) #FIX ME?

buy_solar_pla_sd <- buy_solar_treatment_sd$mean[buy_solar_treatment_sd$individual_treatment == "Placebo"][1]
buy_solar_cdc_sd <- buy_solar_treatment_sd$mean[buy_solar_treatment_sd$individual_treatment == "CDC Health"][1]
buy_solar_lc_sd <- buy_solar_treatment_sd$mean[buy_solar_treatment_sd$individual_treatment == "Low Cash"][1]
buy_solar_hc_sd <- buy_solar_treatment_sd$mean[buy_solar_treatment_sd$individual_treatment == "High Cash"][1]

# % Female total
gender_total <- combo %>%
  group_by(Q2.3) %>%
  summarise(n = n()) %>%
  mutate(percentage = 100 * n / sum(n))

female_total <- gender_total$percentage[gender_total$Q2.3 == "Female"][1]

# % Female per treatment
gender_treatment <- combo %>%
  group_by(individual_treatment, Q2.3) %>%
  summarise(n = n()) %>%
  mutate(percentage = 100 * n / sum(n))

female_pla <- gender_treatment$percentage[gender_treatment$Q2.3 == "Female" & gender_treatment$individual_treatment == "Placebo"][1]
female_cdc <- gender_treatment$percentage[gender_treatment$Q2.3 == "Female" & gender_treatment$individual_treatment == "CDC Health"][1]
female_lc <- gender_treatment$percentage[gender_treatment$Q2.3 == "Female" & gender_treatment$individual_treatment == "Low Cash"][1]
female_hc <- gender_treatment$percentage[gender_treatment$Q2.3 == "Female" & gender_treatment$individual_treatment == "High Cash"][1]

# SD for Female variable: 
f_tab = combo[,c("individual_treatment", "Q2.3")]
f_tab$Female = ifelse(f_tab$Q2.3=="Female", 1, 0)
female_sd <- f_tab %>%
  group_by(individual_treatment) %>%
  summarise(sd = sd(Female, na.rm = TRUE))
female_pla_sd <- as.numeric(female_sd[which(female_sd$individual_treatment=="Placebo"),][2])*100
female_cdc_sd <- as.numeric(female_sd[which(female_sd$individual_treatment=="CDC Health"),][2])*100
female_lc_sd <- as.numeric(female_sd[which(female_sd$individual_treatment=="Low Cash"),][2])*100
female_hc_sd <-as.numeric(female_sd[which(female_sd$individual_treatment=="High Cash"),][2])*100
female_total_sd = sd(f_tab$Female, na.rm = TRUE)*100


#Age total
age_total <- mean(combo$Q2.2, na.rm=TRUE)

#Age per treatment
age_treatment <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = mean(Q2.2, na.rm = TRUE))

age_pla <- age_treatment$mean[age_treatment$individual_treatment == "Placebo"][1]
age_cdc <- age_treatment$mean[age_treatment$individual_treatment == "CDC Health"][1]
age_lc <- age_treatment$mean[age_treatment$individual_treatment == "Low Cash"][1]
age_hc <- age_treatment$mean[age_treatment$individual_treatment == "High Cash"][1]

## Age additional SD: 
age_sd <- combo %>%
  group_by(individual_treatment) %>%
  summarise(sd = sd(Q2.2, na.rm = TRUE))
age_pla_sd = as.numeric(na.omit(age_sd$sd[age_sd$individual_treatment == "Placebo"]))
age_cdc_sd = as.numeric(na.omit(age_sd$sd[age_sd$individual_treatment == "CDC Health"]))
age_lc_sd = as.numeric(na.omit(age_sd$sd[age_sd$individual_treatment == "Low Cash"]))
age_hc_sd = as.numeric(na.omit(age_sd$sd[age_sd$individual_treatment == "High Cash"]))
age_total_sd = sd(combo$Q2.2, na.rm=TRUE)

#Household size total
h_size_total <- mean(combo$Q141, na.rm=TRUE)

#Household size per treatment
h_size_treatment <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = mean(Q141, na.rm = TRUE))

h_size_pla <- h_size_treatment$mean[h_size_treatment$individual_treatment == "Placebo"][1]
h_size_cdc <- h_size_treatment$mean[h_size_treatment$individual_treatment == "CDC Health"][1]
h_size_lc <- h_size_treatment$mean[h_size_treatment$individual_treatment == "Low Cash"][1]
h_size_hc <- h_size_treatment$mean[h_size_treatment$individual_treatment == "High Cash"][1]

# NEW added code for SD: 
h_size_total_sd <- sd(combo$Q141, na.rm=TRUE)

#Household size per treatment
h_size_treatment_sd <- combo %>%
  group_by(individual_treatment) %>%
  summarise(sd = sd(Q141, na.rm = TRUE))

h_size_pla_sd <- h_size_treatment_sd$sd[h_size_treatment_sd$individual_treatment == "Placebo"][1]
h_size_cdc_sd <- h_size_treatment_sd$sd[h_size_treatment_sd$individual_treatment == "CDC Health"][1]
h_size_lc_sd <- h_size_treatment_sd$sd[h_size_treatment_sd$individual_treatment == "Low Cash"][1]
h_size_hc_sd <- h_size_treatment_sd$sd[h_size_treatment_sd$individual_treatment == "High Cash"][1]

#Children number total
num_children_total <- mean(combo$Q142, na.rm=TRUE)

#Children number per treatment
num_children_treatment <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = mean(Q142, na.rm = TRUE))

num_children_pla <- num_children_treatment$mean[num_children_treatment$individual_treatment == "Placebo"][1]
num_children_cdc <- num_children_treatment$mean[num_children_treatment$individual_treatment == "CDC Health"][1]
num_children_lc <- num_children_treatment$mean[num_children_treatment$individual_treatment == "Low Cash"][1]
num_children_hc <- num_children_treatment$mean[num_children_treatment$individual_treatment == "High Cash"][1]

num_children_total_sd <- sd(combo$Q142, na.rm=TRUE)

num_children_treatment_sd <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = sd(Q142, na.rm = TRUE))

num_children_pla_sd <- num_children_treatment_sd$mean[num_children_treatment_sd$individual_treatment == "Placebo"][1]
num_children_cdc_sd <- num_children_treatment_sd$mean[num_children_treatment_sd$individual_treatment == "CDC Health"][1]
num_children_lc_sd <- num_children_treatment_sd$mean[num_children_treatment_sd$individual_treatment == "Low Cash"][1]
num_children_hc_sd <- num_children_treatment_sd$mean[num_children_treatment_sd$individual_treatment == "High Cash"][1]


# % Employed total
employment_total <- combo %>%
  group_by(Q143) %>%
  summarise(n = n()) %>%
  mutate(percentage = 100 * n / sum(n))

emp_total_full <- employment_total$percentage[employment_total$Q143 == "Employed (full time)"][1]
emp_total_part <- employment_total$percentage[employment_total$Q143 == "Employed (part time)"][1]
emp_total_unemp <- employment_total$percentage[employment_total$Q143 == "Unemployed"][1]
#Unemployed
# % Employed per treatment
employment_treatment <- combo %>%
  group_by(individual_treatment, Q143) %>%
  summarise(n = n()) %>%
  mutate(percentage = 100 * n / sum(n))

emp_pla_full <- employment_treatment$percentage[employment_treatment$Q143 == "Employed (full time)" & employment_treatment$individual_treatment == "Placebo"][1]
emp_pla_part <- employment_treatment$percentage[employment_treatment$Q143 == "Employed (part time)" & employment_treatment$individual_treatment == "Placebo"][1]
emp_pla_unemp <- employment_treatment$percentage[employment_treatment$Q143 == "Unemployed" & employment_treatment$individual_treatment == "Placebo"][1]

emp_cdc_full <- employment_treatment$percentage[employment_treatment$Q143 == "Employed (full time)" & employment_treatment$individual_treatment == "CDC Health"][1]
emp_cdc_part <- employment_treatment$percentage[employment_treatment$Q143 == "Employed (part time)" & employment_treatment$individual_treatment == "CDC Health"][1]
emp_cdc_unemp <- employment_treatment$percentage[employment_treatment$Q143 == "Unemployed" & employment_treatment$individual_treatment == "CDC Health"][1]

emp_lc_full <- employment_treatment$percentage[employment_treatment$Q143 == "Employed (full time)" & employment_treatment$individual_treatment == "Low Cash"][1]
emp_lc_part <- employment_treatment$percentage[employment_treatment$Q143 == "Employed (part time)" & employment_treatment$individual_treatment == "Low Cash"][1]
emp_lc_unemp <- employment_treatment$percentage[employment_treatment$Q143 == "Unemployed" & employment_treatment$individual_treatment == "Low Cash"][1]

emp_hc_full <- employment_treatment$percentage[employment_treatment$Q143 == "Employed (full time)" & employment_treatment$individual_treatment == "High Cash"][1]
emp_hc_part <- employment_treatment$percentage[employment_treatment$Q143 == "Employed (part time)" & employment_treatment$individual_treatment == "High Cash"][1]
emp_hc_unemp <- employment_treatment$percentage[employment_treatment$Q143 == "Unemployed" & employment_treatment$individual_treatment == "High Cash"][1]

w_tab = combo[,c("individual_treatment", "Q143")]
w_tab$Full = ifelse(w_tab$Q143=="Employed (full time)", 1, 0)
w_tab$Part = ifelse(w_tab$Q143=="Employed (part time)", 1, 0)
w_tab$UnE = ifelse(w_tab$Q143=="Unemployed", 1, 0)

full_sd <- w_tab %>%
  group_by(individual_treatment) %>%
  summarise(sd = sd(Full, na.rm = TRUE))
part_sd <- w_tab %>%
  group_by(individual_treatment) %>%
  summarise(sd = sd(Part, na.rm = TRUE))
UnE_sd <- w_tab %>%
  group_by(individual_treatment) %>%
  summarise(sd = sd(UnE, na.rm = TRUE))

full_pla_sd = as.numeric(full_sd[which(full_sd$individual_treatment=="Placebo"),][2])*100
full_cdc_sd = as.numeric(full_sd[which(full_sd$individual_treatment=="CDC Health"),][2])*100
full_lc_sd = as.numeric(full_sd[which(full_sd$individual_treatment=="Low Cash"),][2])*100
full_hc_sd = as.numeric(full_sd[which(full_sd$individual_treatment=="High Cash"),][2])*100
full_sample_sd = sd(w_tab$Full, na.rm = TRUE)*100

part_pla_sd = as.numeric(part_sd[which(part_sd$individual_treatment=="Placebo"),][2])*100
part_cdc_sd = as.numeric(part_sd[which(part_sd$individual_treatment=="CDC Health"),][2])*100
part_lc_sd = as.numeric(part_sd[which(part_sd$individual_treatment=="Low Cash"),][2])*100
part_hc_sd = as.numeric(part_sd[which(part_sd$individual_treatment=="High Cash"),][2])*100
part_sample_sd = sd(w_tab$Part, na.rm = TRUE)*100

UnE_pla_sd = as.numeric(UnE_sd[which(UnE_sd$individual_treatment=="Placebo"),][2])*100
UnE_cdc_sd = as.numeric(UnE_sd[which(UnE_sd$individual_treatment=="CDC Health"),][2])*100
UnE_lc_sd = as.numeric(UnE_sd[which(UnE_sd$individual_treatment=="Low Cash"),][2])*100
UnE_hc_sd = as.numeric(UnE_sd[which(UnE_sd$individual_treatment=="High Cash"),][2])*100
UnE_sample_sd = sd(w_tab$UnE, na.rm = TRUE)*100

# remove outliers
combo$Q144[which(combo$Q144>1000)] = NA
#Average weekly spending on food total
food_total <- mean(combo$Q144, na.rm=TRUE)

#Average weekly spending on food per treatment
food_treatment <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = mean(Q144, na.rm = TRUE))

food_pla <- food_treatment$mean[food_treatment$individual_treatment == "Placebo"][1]
food_cdc <- food_treatment$mean[food_treatment$individual_treatment == "CDC Health"][1]
food_lc <- food_treatment$mean[food_treatment$individual_treatment == "Low Cash"][1]
food_hc <- food_treatment$mean[food_treatment$individual_treatment == "High Cash"][1]

# New added SDs: 
food_total_sd <- sd(combo$Q144, na.rm=TRUE)
#Average weekly spending on food per treatment
food_treatment_sd <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = sd(Q144, na.rm = TRUE))
food_pla_sd <- food_treatment_sd$mean[food_treatment_sd$individual_treatment == "Placebo"][1]
food_cdc_sd <- food_treatment_sd$mean[food_treatment_sd$individual_treatment == "CDC Health"][1]
food_lc_sd <- food_treatment_sd$mean[food_treatment_sd$individual_treatment == "Low Cash"][1]
food_hc_sd <- food_treatment_sd$mean[food_treatment_sd$individual_treatment == "High Cash"][1]

#Average weekly spending on non-food total
combo$Q145[which(combo$Q145>500)] = NA
non_food_total <- mean(combo$Q145, na.rm=TRUE)

#Average weekly spending on non-food per treatment
non_food_treatment <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = mean(Q145, na.rm = TRUE))

non_food_pla <- non_food_treatment$mean[non_food_treatment$individual_treatment == "Placebo"][1]
non_food_cdc <- non_food_treatment$mean[non_food_treatment$individual_treatment == "CDC Health"][1]
non_food_lc <- non_food_treatment$mean[non_food_treatment$individual_treatment == "Low Cash"][1]
non_food_hc <- non_food_treatment$mean[non_food_treatment$individual_treatment == "High Cash"][1]

# NEw SD code: 
non_food_total_sd <- sd(combo$Q145, na.rm=TRUE)

#Average weekly spending on non-food per treatment
non_food_treatment_sd <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = sd(Q145, na.rm = TRUE))

non_food_pla_sd <- non_food_treatment_sd$mean[non_food_treatment_sd$individual_treatment == "Placebo"][1]
non_food_cdc_sd <- non_food_treatment_sd$mean[non_food_treatment_sd$individual_treatment == "CDC Health"][1]
non_food_lc_sd <- non_food_treatment_sd$mean[non_food_treatment_sd$individual_treatment == "Low Cash"][1]
non_food_hc_sd <- non_food_treatment_sd$mean[non_food_treatment_sd$individual_treatment == "High Cash"][1]

# code from Matze no.3 : 
combo$Q146[combo$Q146=="Very bad"] <- "1"
combo$Q146[combo$Q146=="Bad"] <- "2"
combo$Q146[combo$Q146=="Neither good nor bad"] <- "3"
combo$Q146[combo$Q146=="Good"] <- "4"
combo$Q146[combo$Q146=="Very good"] <- "5"
combo$Q146 <- as.numeric(combo$Q146)

#5-point finances score total
finances_total <- mean(combo$Q146, na.rm=TRUE)

#5-point finances score per treatment
finances_treatment <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = mean(Q146))

finances_pla <- finances_treatment$mean[finances_treatment$individual_treatment == "Placebo"][1]
finances_cdc <- finances_treatment$mean[finances_treatment$individual_treatment == "CDC Health"][1]
finances_lc <- finances_treatment$mean[finances_treatment$individual_treatment == "Low Cash"][1]
finances_hc <- finances_treatment$mean[finances_treatment$individual_treatment == "High Cash"][1]

# NEW SD: 
finances_total_sd <- sd(combo$Q146, na.rm=TRUE)

#5-point finances score per treatment
finances_treatment_sd <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = sd(Q146))

finances_pla_sd <- finances_treatment_sd$mean[finances_treatment_sd$individual_treatment == "Placebo"][1]
finances_cdc_sd <- finances_treatment_sd$mean[finances_treatment_sd$individual_treatment == "CDC Health"][1]
finances_lc_sd <- finances_treatment_sd$mean[finances_treatment_sd$individual_treatment == "Low Cash"][1]
finances_hc_sd <- finances_treatment_sd$mean[finances_treatment_sd$individual_treatment == "High Cash"][1]

# % School total
school_total <- combo %>%
  group_by(Q2.5) %>%
  summarise(n = n()) %>%
  mutate(percentage = 100 * n / sum(n))

#Never attended
school_total_never <- school_total$percentage[school_total$Q2.5 == "Never attended"][1]

#MiddleSchool or more
school_total_middle_more <- school_total$percentage[school_total$Q2.5 == "Bachelor degree"][1] +
  school_total$percentage[school_total$Q2.5 == "JSS/JHS"][1] + school_total$percentage[school_total$Q2.5 == "Middle"][1] +
  school_total$percentage[school_total$Q2.5 == "Post-secondary diploma"][1] +
  school_total$percentage[school_total$Q2.5 == "Post graduate (Cert. Diploma Masters PHD etc)"][1] +
  school_total$percentage[school_total$Q2.5 == "Post middle/secondary certificate"][1] +
  school_total$percentage[school_total$Q2.5 == "SSS/SHS"][1] + school_total$percentage[school_total$Q2.5 == "Vocational/Technical/Commercial"][1]

# school total - never attended: 
# % School per treatment
school_treatment <- combo %>%
  group_by(individual_treatment, Q2.5) %>%
  summarise(n = n()) %>%
  mutate(percentage = 100 * n / sum(n))

#Never attended
school_pla_never <- school_treatment$percentage[school_treatment$Q2.5 == "Never attended" & school_treatment$individual_treatment == "Placebo"][1]
school_cdc_never <- school_treatment$percentage[school_treatment$Q2.5 == "Never attended" & school_treatment$individual_treatment == "CDC Health"][1]
school_lc_never <- school_treatment$percentage[school_treatment$Q2.5 == "Never attended" & school_treatment$individual_treatment == "Low Cash"][1]
school_hc_never <- school_treatment$percentage[school_treatment$Q2.5 == "Never attended" & school_treatment$individual_treatment == "High Cash"][1]

# New: SD for never attended - so simple bivariate variable: 
# school yes/no

w_cmb = combo[,c("individual_treatment","Q2.5")]
w_cmb$Attended = ifelse(w_cmb$Q2.5 == "Never attended", 1, 0)
school_total_sd <- w_cmb %>%
  group_by(individual_treatment) %>%
  summarise(sd = sd(Attended, na.rm = TRUE)) 

school_pla_never_sd = as.numeric(school_total_sd[which(school_total_sd$individual_treatment=="Placebo"),][2])*100
school_cdc_never_sd = as.numeric(school_total_sd[which(school_total_sd$individual_treatment=="CDC Health"),][2])*100
school_lc_never_sd = as.numeric(school_total_sd[which(school_total_sd$individual_treatment=="Low Cash"),][2])*100
school_hc_never_sd = as.numeric(school_total_sd[which(school_total_sd$individual_treatment=="High Cash"),][2])*100
school_never_sd = sd(w_cmb$Attended, na.rm = TRUE)*100

#MiddleSchool or more

school_pla_middle_more <- school_treatment$percentage[school_treatment$Q2.5 == "Bachelor degree" & school_treatment$individual_treatment == "Placebo"][1] + 
  school_treatment$percentage[school_treatment$Q2.5 == "JSS/JHS" & school_treatment$individual_treatment == "Placebo"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Middle" & school_treatment$individual_treatment == "Placebo"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Post-secondary diploma" & school_treatment$individual_treatment == "Placebo"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Post graduate (Cert. Diploma Masters PHD etc)" & school_treatment$individual_treatment == "Placebo"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Post middle/secondary certificate" & school_treatment$individual_treatment == "Placebo"][1] + 
  school_treatment$percentage[school_treatment$Q2.5 == "SSS/SHS" & school_treatment$individual_treatment == "Placebo"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Vocational/Technical/Commercial" & school_treatment$individual_treatment == "Placebo"][1]

school_cdc_middle_more <- school_treatment$percentage[school_treatment$Q2.5 == "Bachelor degree" & school_treatment$individual_treatment == "CDC Health"][1] + 
  school_treatment$percentage[school_treatment$Q2.5 == "JSS/JHS" & school_treatment$individual_treatment == "CDC Health"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Middle" & school_treatment$individual_treatment == "CDC Health"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Post-secondary diploma" & school_treatment$individual_treatment == "CDC Health"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Post graduate (Cert. Diploma Masters PHD etc)" & school_treatment$individual_treatment == "CDC Health"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "SSS/SHS" & school_treatment$individual_treatment == "CDC Health"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Vocational/Technical/Commercial" & school_treatment$individual_treatment == "CDC Health"][1]

school_lc_middle_more <- school_treatment$percentage[school_treatment$Q2.5 == "Bachelor degree" & school_treatment$individual_treatment == "Low Cash"][1] + 
  school_treatment$percentage[school_treatment$Q2.5 == "JSS/JHS" & school_treatment$individual_treatment == "Low Cash"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Middle" & school_treatment$individual_treatment == "Low Cash"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Post-secondary diploma" & school_treatment$individual_treatment == "Low Cash"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Post middle/secondary certificate" & school_treatment$individual_treatment == "Low Cash"][1] + 
  school_treatment$percentage[school_treatment$Q2.5 == "SSS/SHS" & school_treatment$individual_treatment == "Low Cash"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Vocational/Technical/Commercial" & school_treatment$individual_treatment == "Low Cash"][1]

school_hc_middle_more <- school_treatment$percentage[school_treatment$Q2.5 == "Bachelor degree" & school_treatment$individual_treatment == "High Cash"][1] + 
  school_treatment$percentage[school_treatment$Q2.5 == "JSS/JHS" & school_treatment$individual_treatment == "High Cash"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Middle" & school_treatment$individual_treatment == "High Cash"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Post-secondary diploma" & school_treatment$individual_treatment == "High Cash"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Post graduate (Cert. Diploma Masters PHD etc)" & school_treatment$individual_treatment == "High Cash"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "SSS/SHS" & school_treatment$individual_treatment == "High Cash"][1] +
  school_treatment$percentage[school_treatment$Q2.5 == "Vocational/Technical/Commercial" & school_treatment$individual_treatment == "High Cash"][1]

### middle school - SD - Yes no: 
w_cmb2 = combo[,c("individual_treatment","Q2.5")]
w_cmb2$Middle = ifelse(w_cmb2$Q2.5 %in% c("Vocational/Technical/Commercial",
                                          "SSS/SHS","Post middle/secondary certificate", 
                                          "Post graduate (Cert. Diploma Masters PHD etc)",
                                          "Post-secondary diploma", "Middle",
                                          "JSS/JHS", "Bachelor degree") , 1, 0)

middle_total_sd <- w_cmb2 %>%
  group_by(individual_treatment) %>%
  summarise(sd = sd(Middle, na.rm = TRUE)) 

school_pla_middle_sd = as.numeric(middle_total_sd[which(middle_total_sd$individual_treatment=="Placebo"),][2])*100
school_cdc_middle_sd = as.numeric(middle_total_sd[which(middle_total_sd$individual_treatment=="CDC Health"),][2])*100
school_lc_middle_sd = as.numeric(middle_total_sd[which(middle_total_sd$individual_treatment=="Low Cash"),][2])*100
school_hc_middle_sd = as.numeric(middle_total_sd[which(middle_total_sd$individual_treatment=="High Cash"),][2])*100
school_middle_sd = sd(w_cmb2$Middle, na.rm = TRUE)*100


# Distance
dist_total <- mean(combo$clinic_distance[which(combo$individual_treatment %in% c("Placebo","CDC Health", "Low Cash","High Cash" ))], na.rm=TRUE)

dist_treatment <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = mean(clinic_distance,  na.rm = TRUE))

dist_pla <- dist_treatment$mean[dist_treatment$individual_treatment == "Placebo"][1]
dist_cdc <- dist_treatment$mean[dist_treatment$individual_treatment == "CDC Health"][1]
dist_lc <- dist_treatment$mean[dist_treatment$individual_treatment == "Low Cash"][1]
dist_hc <- dist_treatment$mean[dist_treatment$individual_treatment == "High Cash"][1]

### SD: 
dist_total_sd <- sd(combo$clinic_distance[which(combo$individual_treatment %in% c("Placebo","CDC Health", "Low Cash","High Cash" ))], na.rm=TRUE)

dist_treatment_sd <- combo %>%
  group_by(individual_treatment) %>%
  summarise(mean = sd(clinic_distance,  na.rm = TRUE))

dist_pla_sd <- dist_treatment_sd$mean[dist_treatment_sd$individual_treatment == "Placebo"][1]
dist_cdc_sd <- dist_treatment_sd$mean[dist_treatment_sd$individual_treatment == "CDC Health"][1]
dist_lc_sd <- dist_treatment_sd$mean[dist_treatment_sd$individual_treatment == "Low Cash"][1]
dist_hc_sd <- dist_treatment_sd$mean[dist_treatment_sd$individual_treatment == "High Cash"][1]


################# : 
# Manuscript: Table 1: Summary Statistics for Survey Results from Phase I  

#### Build a new final Table: 
e = c(0)
#Vectors for the table
Total_Sample <- round(c(total_num_villages, subj_total, vii_total_1, v_cert_total, su_total_1, buy_solar_total, female_total, age_total,
                        h_size_total, num_children_total, emp_total_full, emp_total_part, emp_total_unemp, food_total, non_food_total, finances_total, school_total_never,
                        school_total_middle_more, dist_total), digits=1)

Total_Sample_sd = as.numeric(format(round(c(e, e, vii_total_sd, v_cert_total_sd, su_total_sd, buy_solar_total_sd, female_total_sd, age_total_sd,
                                            h_size_total_sd, num_children_total_sd, full_sample_sd, part_sample_sd, UnE_sample_sd, food_total_sd, non_food_total_sd, finances_total_sd, school_never_sd,
                                            school_middle_sd, dist_total_sd),1) , nsmall = 1))

Placebo <- round(c(villages_pla, subj_t1, vii_pla_1, v_cert_pla, su_pla_1, buy_solar_pla, female_pla, age_pla,
                   h_size_pla, num_children_pla, emp_pla_full, emp_pla_part, emp_pla_unemp, food_pla, non_food_pla, finances_pla, school_pla_never,
                   school_pla_middle_more, dist_pla), digits=1)

Placebo_sd <-  as.numeric(format(round(c(e, e, vii_pla_sd,v_cert_pla_sd, su_pla_sd, buy_solar_pla_sd, female_pla_sd, age_pla_sd, 
                                         h_size_pla_sd, num_children_pla_sd, full_pla_sd, part_pla_sd, UnE_pla_sd, food_pla_sd, non_food_pla_sd, finances_pla_sd, school_pla_never_sd,
                                         school_pla_middle_sd, dist_pla_sd), digits=1), nsmall = 1))

Health_Message <- round(c(villages_cdc, subj_t2, vii_cdc_1, v_cert_cdc, su_cdc_1, buy_solar_cdc, female_cdc, age_cdc,
                          h_size_cdc, num_children_cdc, emp_cdc_full, emp_cdc_part, emp_cdc_unemp, food_cdc, non_food_cdc, finances_cdc, school_cdc_never,
                          school_cdc_middle_more, dist_cdc), digits=1)
Health_Message_sd <-  as.numeric(format(round(c(e, e, vii_cdc_sd,v_cert_cdc_sd, su_cdc_sd , buy_solar_cdc_sd, female_cdc_sd, age_cdc_sd, 
                                                h_size_cdc_sd, num_children_cdc_sd, full_cdc_sd, part_cdc_sd, UnE_cdc_sd, food_cdc_sd, non_food_cdc_sd, finances_cdc_sd, school_cdc_never_sd,
                                                school_cdc_middle_sd, dist_cdc_sd), digits=1), nsmall = 1))

Low_Cost <- round(c(villages_lc, subj_t3, vii_lc_1, v_cert_lc, su_lc_1, buy_solar_lc, female_lc, age_lc,
                    h_size_lc, num_children_lc, emp_lc_full, emp_lc_part, emp_lc_unemp, food_lc, non_food_lc, finances_lc, school_lc_never,
                    school_lc_middle_more, dist_lc), digits=1)

Low_Cost_sd <-  as.numeric(format(round(c(e, e, vii_lc_sd,v_cert_lc_sd, su_lc_sd , buy_solar_lc_sd, female_lc_sd, age_lc_sd, 
                                          h_size_lc_sd, num_children_lc_sd, full_lc_sd, part_lc_sd, UnE_lc_sd, food_lc_sd, non_food_lc_sd, finances_lc_sd, school_lc_never_sd,
                                          school_lc_middle_sd, dist_lc_sd), digits=1), nsmall = 1))

High_Cost <- round(c(villages_hc, subj_t4, vii_hc_1, v_cert_hc, su_hc_1, buy_solar_hc, female_hc, age_hc,
                     h_size_hc, num_children_hc, emp_hc_full, emp_hc_part, emp_hc_unemp, food_hc, non_food_hc, finances_hc, school_hc_never,
                     school_hc_middle_more, dist_hc), digits=1)

High_Cost_sd <-  as.numeric(format(round(c(e, e, vii_hc_sd,v_cert_hc_sd, su_hc_sd, buy_solar_hc_sd, female_hc_sd, age_hc_sd, 
                                           h_size_hc_sd, num_children_hc_sd, full_hc_sd, part_hc_sd, UnE_hc_sd, food_hc_sd, non_food_hc_sd, finances_hc_sd, school_hc_never_sd,
                                           school_hc_middle_sd, dist_hc_sd), digits=1), nsmall = 1))


col1 = c()
for (i in c(1:19)){
  if (i %in% c(1,2)){
    col1[i] = Total_Sample[i]
  } else {
    col1[i] = as.character(paste(Total_Sample[i],paste("(",Total_Sample_sd[i],")", sep ="")))
  }
  
}
col1



col2 = c()
for (i in c(1:19)){
  if (i %in% c(1,2)){
    col2[i] = Placebo[i]
  } else {
    col2[i] = as.character(paste(Placebo[i],paste("(",Placebo_sd[i],")", sep ="")))
  }
  
}
col2


col3 = c()
for (i in c(1:19)){
  if (i %in% c(1,2)){
    col3[i] = Health_Message[i]
  } else {
    col3[i] = as.character(paste(Health_Message[i],paste("(",Health_Message_sd[i],")", sep ="")))
  }
  
}
col3




col4 = c()
for (i in c(1:19)){
  if (i %in% c(1,2)){
    col4[i] = Low_Cost[i]
  } else {
    col4[i] = as.character(paste(Low_Cost[i],paste("(",Low_Cost_sd[i],")", sep ="")))
  }
  
}
col4



col5 = c()
for (i in c(1:19)){
  if (i %in% c(1,2)){
    col5[i] = High_Cost[i]
  } else {
    col5[i] = as.character(paste(High_Cost[i],paste("(",High_Cost_sd[i],")", sep ="")))
  }
  
}
col5

test = cbind(col1, col2, col3, col4, col5)
library(htmlTable)
htmlTable(test)
img_col_names <- c("Number of Villages", "Number of Subjects", "Vaccine Intention", "Certainty of vacccination", "Solar Device intention",
                   "Certainty of solar intention", "% Female", "Mean age", "Mean household size", "Mean number of children < 18",
                   "% Employed full time", "% Employed part time", "% Unemployed", "Mean weekly spend food", "Mean weekly spend non-food",
                   "Average finances score", "% Never attended school", "% Middle school or greater", "Distance to clinic in km")


#Creating table
df_fig_tt <- data.frame(img_col_names, test)
colnames(df_fig_tt) <- c("", "Total Sample", "Placebo", "Health Message", "Low Cost", "High Cost")
#htmlTable(df_fig_1)
rownames(df_fig_tt) = df_fig_tt[,1]
df_fig_tt[,1] = NULL
htmlTable(df_fig_tt)

htmlTable(df_fig_tt, 
          tfoot  = "- For the average weekly spending on food and average spending on non-food outliers (above 1000 and 500 repectively) have been removed.")

library(xtable)
print.xtable(df_fig_tt, file = "Main_Table1.tex", compress = FALSE)
#xtable(df_fig_tt, type = "latex", file = "Data Analysis/Summary Statistics Phase 1/Phase1.txt")





